Data processing

library(dplyr)
library(oulad)
library(ggplot2)
data(course)
data(assessment)
data(student)
data(vle)
data(student_assessment)
data(student_vle)
data(student_registration)
student_vle_unique<- student_vle %>% group_by(code_module,code_presentation,id_student,id_site,date) %>% summarise(sum_click=sum(sum_click))
rm(student_vle)
#Get student-level info across multiple datasets
tmaRatio<- assessment %>% filter(assessment_type!="Exam") %>% group_by(code_module,code_presentation) %>% summarise(tma=mean(assessment_type=="TMA"))

moduleStudentClick<-student_vle_unique %>% group_by(code_module,code_presentation,id_student) %>% summarise(meanDailyClick=sum(sum_click)/n_distinct(date))
mergedData<- merge(student,tmaRatio,by=c("code_module","code_presentation"))
mergedData<- merge(mergedData,moduleStudentClick,by=c("code_module","code_presentation","id_student"))

mergedData$code_module_category<-"Social sciences"
mergedData$code_module_category[mergedData$code_module %in% c("CCC","DDD","EEE","FFF")]<-"STEM"
#re-code final result and education level as ordered factor variables
uniq_levels<-unique(mergedData$highest_education)
mergedData$highest_education<-factor(mergedData$highest_education,levels=uniq_levels[c(5,2,3,1,4)],ordered=T)
mergedData$final_result<-factor(mergedData$final_result,levels=c("Withdrawn","Fail","Pass","Distinction"),ordered = T)

Sex vs. final result

Female students tend to do better in social science classes, male students do better in STEM classes.

library(plotly)
library(tidyr) 
library(dplyr)
figure1 <- plot_ly(mergedData, x = ~final_result, color = ~gender) %>% add_histogram() %>% layout(title = 'Gender and Learning outcome',xaxis = list(type='category',title='Learning outcome'), yaxis=list (title='Total number of learners'),range = c(0,30000))
figure1

Age vs. final result

Younger students (<35) tend to do better, in both social science and STEM classes.

figure2 <- plot_ly(mergedData, x = ~final_result, color = ~age_band) %>% add_histogram() %>% layout(title = 'Age and Learning outcome',xaxis = list(type='category',title='Learning outcome'), yaxis=list (title='Total number of learners'),range = c(0,30000))
figure2
## Warning: 'layout' objects don't have these attributes: 'range'
## Valid attributes include:
## 'font', 'title', 'titlefont', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'template', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'barmode', 'bargap', 'mapType'

Education level and final result

It seems that better prior education levels are associated with better class outcome, especially STEM class outcome. In heat map, red means we observe more students with a particular combination of education level and final result than expected by chance.

figure3 <- plot_ly(mergedData, x = ~final_result, color = ~highest_education) %>% add_histogram() %>% layout(title = 'Education Level and Learning outcome',xaxis = list(type='category',title='Learning outcome'), yaxis=list (title='Total number of learners'),range = c(0,30000))
figure3
## Warning: 'layout' objects don't have these attributes: 'range'
## Valid attributes include:
## 'font', 'title', 'titlefont', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'template', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'barmode', 'bargap', 'mapType'
chisqResult<-chisq.test(mergedData$final_result,mergedData$highest_education)
chisqResult
## 
##  Pearson's Chi-squared test
## 
## data:  mergedData$final_result and mergedData$highest_education
## X-squared = 906.11, df = 12, p-value < 2.2e-16
ratioMat<-as.data.frame.matrix(chisqResult$observed/chisqResult$expected)
library(ComplexHeatmap)
library(circlize)
Heatmap(ratioMat,name="Ratio",cluster_rows = F,cluster_columns = F,col = colorRamp2(c(0, 1, 2.9), c("blue", "white", "red")))